package com.shujia.opt

import org.apache.spark.sql.{DataFrame, SparkSession}

object Demo07SparkSqlMapJoin {

  def main(args: Array[String]): Unit = {
    val spark: SparkSession = SparkSession.builder()
      .master("local[4]")
      .appName("Demo07SparkSqlMapJoin")
      .config("spark.sql.shuffle.partitions", "2")
      .getOrCreate()


    val student: DataFrame = spark.read
      .format("csv")
      .schema("id STRING , name STRING ,age INT ,gender STRING ,clazz STRING")
      .load("spark/data/stu/students.txt")


    val score: DataFrame = spark.read
      .format("csv")
      .schema("sid STRING ,cId STRING , sco INT")
      .load("spark/data/stu/score.txt")

    /**
     * .hint("broadcast") : 广播表实现map join
     *
     */
    import spark.implicits._

    student.join(score.hint("broadcast"), $"sId" === $"id").show(10000000)


    student.createOrReplaceTempView("student")
    score.createOrReplaceTempView("score")


    spark.sql(
      """
        |
        |select /*+broadcast(a)  */ * from score as a join student as b on a.sId=b.id
        |
      """.stripMargin).show(1000000)


    while (true) {

    }

  }
}
